#############################################################################
# FigureC4.R
# Aim: to replicate Figure C4 in Atsusaka and Stevenson (2021)
# Run time: about 5 seconds
#############################################################################

rm(list=ls())
start <- Sys.time()
library(tidyverse)

# Installing cWise to get bs.est function
library(devtools)
devtools::install_github("YukiAtsusaka/cWise")
library(cWise)

#############################################################################
# (2) DATA GENERATING PROCESS
#############################################################################
i.logit <- function(XB){ exp(XB)/(1 + exp(XB))}
set.seed(1234)

N = 4000
beta  = c(-1.5, 0.5, 0.02)                           # Unknown parameter
theta = c(2, -0.1, -0.01)                            # Unknown parameter
female = rbinom(n=N, size=1, prob=0.5)               # 50% Male, 50% Female
age = rpois(n=N, lambda=30)                          # Age with mean 30
x = as.matrix(data.frame(rep(1, N), female, age))

XB =  x %*% beta                                     # Linear aggregator with betas
XT =  x %*% theta                                    # Linear aggregator with thetas
pi.x    = i.logit(XB)                                # Pi|Male
gamma.x = i.logit(XT)                                # Gamma|Male

p = 0.1                                              # Randomization probability
p2 = 0.15                                            # Randomization probability for the anchor question
Attentive = rbinom(n=N, size=1, prob=gamma.x)        # Attentive R or not

# glm(pi.x~x, family=binomial)                       # CHECK
# glm(gamma.x~x, family=binomial)                    # CHECK


# SENSITIVE QUESTION OF INTERST
StatementA = rbinom(n=N, size=1, prob=pi.x)          # Sesitive item
StatementB = rbinom(n=N, size=1, prob=p)             # Randomization item
True.res = ifelse(StatementA != StatementB, 0, 1)    # Survey answer 

Obs.res = rep(NA, N)
Obs.res[Attentive==1] = True.res[Attentive==1]       # Observed answer for attentive Rs
Obs.res[Attentive==0] = rbinom(n=length(N[Attentive==0]),
                               size=1, prob=0.5)     # Observed answer, otherwise


# NON-SENSITIVE ANCHOR QUESTION
StatementC = 0                                       # Attention "c"heck item (True prevalence is 0 for everyone)
StatementD = rbinom(n=N, size=1, prob=p2)            # Anchor question
True.res.prime = ifelse(StatementC != StatementD, 0, 1) # Survey answer in Anchor Question

Obs.res.prime = rep(NA, N)
Obs.res.prime[Attentive==1] = True.res.prime[Attentive==1] # Observed answer for attentive Rs
Obs.res.prime[Attentive==0] = rbinom(n=length(N[Attentive==0]),
                                     size=1, prob=0.5)# Observed answer, otherwise

Y = Obs.res
A = Obs.res.prime

dat <- cbind(Y, female, age, A, p, p2)                          # Observed data
dat <- as_tibble(dat)
head(dat) 


#############################################################################
# (2) RESULTS
#############################################################################

m <- cmreg(Y~female+age+A, p=0.1, p.prime=0.15, data=dat)
m

pred.nonfem = cmpredict(out=m, typical=30, zval=0)
pred.female = cmpredict(out=m, typical=30, zval=1)

#############################################################################
# (3) COMPARE THE RESULTS WITH GROUND TRUTH 
#############################################################################

pdf("FigureC4.pdf", width=10, height=4.5)
par(mfrow=c(1,2),mar=c(3,4,3,1), oma=c(2,0,0,0))
hist(pi.x[female==1], breaks=40, xlim=c(0.2,0.5), ylim=c(0,200), 
     col="firebrick4", 
     xlab="Proportion of Sensitive Attributes", main="Ground Truth")
hist(pi.x[female==0], breaks=40, add=T)
text(0.22,100, labels=expression(paste(X[1]), "         =0"))
text(0.45,100, labels=expression(paste(X[1]), "         =1"), col="firebrick4")
hist(pred.female, breaks=40, xlim=c(0.2,0.5),  col="firebrick4",
     xlab="Proportion of Sensitive Attributes", main="Simulated Predicted Values")
hist(pred.nonfem, breaks=40, add=T)
text(0.22,600, labels=expression(paste(X[1]), "         =0"))
text(0.45,600, labels=expression(paste(X[1]), "         =1"), col="firebrick4")
mtext(text="Proportion of Sensitive Attributes",side=1,line=0,outer=TRUE)

dev.off()

end <- Sys.time()
start - end

#############################################################################
# END OF THIS R SOURCE FILE
#############################################################################